home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
ftp.mactech.com 2010
/
ftp.mactech.com.tar
/
ftp.mactech.com
/
machack
/
Hacks97
/
NewsTicker.sit
/
NewsTicker
/
source code
/
Extractors
/
NewsComExtractor.cp
< prev
next >
Wrap
Text File
|
1997-06-26
|
4KB
|
167 lines
/*------------------------------------------------------------------------------
#
# NewsTicker, my Hack for 1997
#
# NewsComExtractor.h - Derived from HTMLExtractor, we get passed the tokens
# and try to recognize headlines out of it. We parse
# the page "www.news.com", the technical news page of
# c|net.com.
#
------------------------------------------------------------------------------*/
#include <string.h>
#include "TickerGlobals.h"
#include "NewsComExtractor.h"
#include "HTMLExtractor.h"
// Refresh every 20 minutes
#define kNewsPeriod 1200
#define kNewsAddress "www.news.com"
// Globals for the News.Com extractor
unsigned long gNewsNextTime = 0;
class NEWSExtractor: public HTMLExtractor
{
protected:
enum NewsParser { knpParsing,
//text headlines are <f><strong><a>headline
knpHasFont, knpHasStrong, knpHasLinkAndStrong,
knpHasAllForText, knpWaitingForParagraph,
//Graphic headlines are <a><img>
knpHasLink };
NewsParser mfCurrentState;
Str255 mfTheURL;
Boolean mfInTD;
public:
NEWSExtractor(sMyDataPtr theDataPtr);
virtual ~NEWSExtractor (void){ }
virtual void HandleToken(char* string, short numchars, Boolean isCommand);
};
//
// We just parse the entries to find the element
//
NEWSExtractor::NEWSExtractor(sMyDataPtr theDataPtr)
:HTMLExtractor(kNewsAddress, 1000, theDataPtr)
{
unsigned long now;
mfCurrentState = knpParsing; //just waiting for our thing to come through
mfInTD = false;
GetDateTime(&now);
gNewsNextTime = now + kNewsPeriod; //refresh the news every 20 minutes
}
void NEWSExtractor::HandleToken(char* string, short numchars, Boolean isCommand)
{
Str255 thestr;
if (isCommand)
{
if (MyCompareStr(string, "<TD ")) //table delimiters mark the image links
mfInTD = true;
if (MyCompareStr(string, "</TD "))
{
mfInTD = false;
mfCurrentState = knpParsing;
}
switch (mfCurrentState)
{
case knpParsing: //from nothing, we want font or A
if (MyCompareStr(string, "<FONT "))
mfCurrentState = knpHasFont;
else if (MyCompareStr(string, "<A ")&&mfInTD)
{
if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
{
mfCurrentState = knpHasLink;
}
else mfCurrentState = knpParsing;
}
break;
case knpHasFont: //for this, we only want strong
if (MyCompareStr(string, "<STRONG>"))
mfCurrentState = knpHasLinkAndStrong;
else mfCurrentState = knpParsing;
break;
case knpHasLinkAndStrong: //for this, we only want <a>
if (MyCompareStr(string, "<A "))
{
if (HTMLExtractor::ParseGoodURL(string+2, mfTheURL))
{
mfCurrentState = knpHasAllForText;
}
else mfCurrentState = knpParsing;
}
else mfCurrentState = knpParsing;
break;
case knpHasAllForText:
mfCurrentState = knpParsing; //any tag from this position is a failure
break;
case knpWaitingForParagraph: //OK, from now on, we're only waiting for a <P>
if (MyCompareStr(string, "<P>"))
mfCurrentState = knpParsing;
break;
case knpHasLink: //for this, we only want an img, if there's an alt text
if (MyCompareStr(string, "<IMG "))
{
FindATag(string+4, (char*)&thestr[1], "ALT");
thestr[0] = strlen( (char*)&thestr[1] );
if (thestr[0]>0)
AddEntry(thestr, mfTheURL);
}
mfCurrentState = knpParsing;
break;
}
}
else
{
if (mfCurrentState==knpHasAllForText) //OK, get got a headline!
{
if (numchars>255)
numchars = 255;
thestr[0] = numchars;
BlockMove(string, &thestr[1], numchars);
//Add the entry
AddEntry(thestr, mfTheURL);
mfCurrentState = knpWaitingForParagraph;
}
else if (mfCurrentState!=knpWaitingForParagraph)
mfCurrentState = knpParsing; //and wait for tne next headline
}
}
void LoadNewsCom(sMyDataPtr gGlobalsPtr)
{
NEWSExtractor* theparser = new NEWSExtractor(gGlobalsPtr);
theparser->ReadEntries();
delete theparser;
InitCursor();
}
// This reloads us if necessary
Boolean MustReloadNewsCom(sMyDataPtr gGlobalsPtr)
{
unsigned long now;
GetDateTime(&now);
if (now<gNewsNextTime) //time to check yet?
return false;
return true; //always recheck on the time
}